#!/usr/bin/env bash

# Do a "ps" process status on a node-list, but exclude system processes
# Usage: psnode node-list
# Homepage: https://github.com/OleHolmNielsen/Slurm_tools/

if test $# -ne 1
then
	echo ERROR. Usage: $0 node-list
	exit -1
fi

PS="/bin/ps"
PSFLAGS="-o pid,state,user,start,cputime,%cpu,rssize,command --columns 100"
SSH="ssh -n -x"
PING="/bin/ping -c 1 -w 3"

# System users EXCLUDED from the process list
USERLIST="root rpc rpcuser daemon ntp smmsp sshd hpsmh named dbus 68 chrony polkitd munge"

# Make a deselect-list consisting only of existent users
deselect_list=""
sep=""
for u in $USERLIST
do
	if test -n "`getent passwd $u`"
	then
		# This user exists in the passwd database
		deselect_list="${deselect_list}${sep}${u}"
		sep=" "
	fi
done

# The "scontrol show hostnames" command is used to expand NodeList expressions
for node in `scontrol show hostnames $*`
do
	echo Node ${node}:
	sinfo -N --long -n $node 
	squeue -o "%A" -w $node
	if $PING $node 2>&1 > /dev/null
	then
		$SSH $node $PS $PSFLAGS --deselect -u \""${deselect_list}"\"
	else
		echo '*** WARNING ***' Cannot ping host ${node} !
	fi
done
